#Preamble
library(dplyr)
library(tidyr)
library(stringr)
library(plyr)
library(pastecs)
library(ggplot2)
library(cregg)
library(tidyverse)
library(broom)
library(data.table)
library(ashr)
library(pastecs)
#Load datasets for analysis. "STAN0156_OUTPUT.csv" is the raw dataset we received from YouGov, which has been reshaped for the purpose of the 
#analysis. In the first section, we report, for transparency, the choices we have made when generating dummy variables to discretize demographic
#characteristics. "standard_df" and "image_df" report data related to choices made by respondents on politicians profiles for the standard conjoint
#experiment and the visual conjoint experiment.
respondents <- read.csv("STAN0156_OUTPUT.csv")
load("standard.Rdata") #profile matching standard
load("image.Rdata") #data analysis image

#Demographic characteristics of respondents
#Age
respondents$age <- 2022 - respondents$birthyr 
respondents$is_boomer <- 0
respondents$is_boomer[respondents$age > 58] <- 1

#Political affiliation
respondents$party_aff <- 2
respondents$party_aff[respondents$pid3 == "Democrat"] <- 1
respondents$party_aff[respondents$pid3 == "Republican"] <- 3

#Education
respondents$high_edu <- 0
respondents$high_edu[respondents$educ_demo == "Post-grad" |
                       respondents$educ_demo == "2-year"|
                       respondents$educ_demo == "4-year"] <- 1

#Race==White
respondents$white <- 0
respondents$white[respondents$race == "White"] <- 1

#Gender
respondents$female <- 0
respondents$female[respondents$gender_demo == "Female"] <- 1



#Twitter use
respondents$twitter_use<-(respondents$social_media_2)
respondents$twitter_use<-gsub("not selected", "0",  respondents$twitter_use)
respondents$twitter_use<-gsub("selected", "1",  respondents$twitter_use)
respondents$twitter_use<-as.factor(respondents$twitter_use)



#Final DF
respondents_df<-select(respondents, twitter_use, age,party_aff, caseid, weight )
respondents_df$caseid <-as.character(respondents_df$caseid)


##Experiment Results
#Standard conjoint
standard_df<- base::merge(standard_conjoint_results, respondents_df, by = "caseid")

#Recoding for clarity Twitter feedback variable
standard_df$Feedback <- as.factor(standard_df$Feedback)
standard_df$Feedback<-recode(standard_df$Feedback, "A few" = 'Low', "A lot" = "High")

#Renaming for clarity Religion
standard_df$Religion<-standard_df$Ideology

#Transforming to factor profile variables
standard_df[,c( "Gender" , "Race" , "Generation" , "Religion",  
           "Profession" , "Education" , "Military",  "Party" , "Feedback")] <- 
            lapply(standard_df[,c("Gender" , "Race" , "Generation" , "Religion", 
                                  "Profession" , "Education" , "Military", "Party", "Feedback")], as.factor) 
#marking reference levels to "No religion"
standard_df$Religion <- relevel(standard_df$Religion, ref = "No Religion")

##Standard Conjoint AMCE results 
#estimation function
f1 <- choice ~ Gender + Race + Generation + Religion + 
  Profession + Education + Military + Party + Feedback

#Model estimation
amce_model_standard <-amce(standard_df, f1,
                  id = ~ caseid, weights = ~ weight)            



#Image conjoint
image_df<- base::merge(image_results, respondents_df, by = "caseid")

#Transforming characteristics as factors
image_df[,c("gender", "race", "age", "edu", "mil", "party", "feed", "prof", "relig", "tweet1", "tweet2")] <- 
        lapply(image_df[,c("gender", "race", "age.x", "edu", "mil", "party", "feed", "prof", "relig", "tweet1", "tweet2")], as.factor)

#Leveling Gender reference category as "Female"
levels(image_df$gender) <- c("Male", "Female")
image_df$gender <- relevel(image_df$gender, ref = "Female") 

#Leveling Gender reference category as "Black"
levels(image_df$race) <- c("White", "Black")
image_df$race <- relevel(image_df$race, ref = "Black") 

#Leveling Education reference category as "College"
levels(image_df$edu) <- c("High School", "College", "Ivy League")
image_df$edu <- relevel(image_df$edu, ref = "College") 

#Leveling Age reference category as "Boomer
levels(image_df$age) <- c("Boomer", "Millennial")
image_df$age <- relevel(image_df$age, ref = "Boomer") 

#No re-leveling needed, default reference category correct
levels(image_df$mil) <- c("Not Served", "Served")
levels(image_df$party) <- c("Republican", "Democrat")

levels(image_df$feed) <- c("Low", "High")
levels(image_df$prof) <- c("Used Car Dealer", "Doctor", "Farmer", "Lawyer", "Entrepreneur", "Teacher")

#Releveling reference category "Doctor"
image_df$prof <- relevel(image_df$prof, ref = "Doctor") 

#Leveling Religion reference category as "No Religion"
levels(image_df$relig) <- c("No Religion", "Catholic", "Jewish", "Mormon", "Protestant")
image_df$relig <- relevel(image_df$relig, ref = "No Religion") 

levels(image_df$party) <- c("Republican", "Democrat")
image_df$party <- relevel(image_df$party, ref = "Democrat") 

#Leveling Tweets categories
levels(image_df$tweet1) <- c("0", "1")
levels(image_df$tweet2) <- c("3", "4")


#Estimation equation
f2 <- choice ~ gender + race +  age + relig + prof + edu +  mil + party  + feed + tweet1 + tweet2

#Model estimation
amce_model_image <-amce(image_df, f2,                
                        id = ~ caseid,
                        weights = ~ weight.x,
                        feature_labels = list(gender ="Gender", race = "Race", edu = "Education", age ="Generation", mil = "Military", party = "Party", feed = "Feedback", prof = "Profession", relig = "Religion", tweet1 = "Tweet1", tweet2 = "Tweet2")
)

#Removing results for Tweet. We do not display results for tweets, but given that they were part of the treatment we maintain them in the estimation equation as controls.
amce_model_image_reduced <- amce_model_image[1:26,]

#Data preparation for Coefficient plot, Fig. 2
amce_model_image_reduced$model <- "Image"
amce_model_standard$model <- "Standard"
graph_data <- bind_rows(amce_model_image_reduced, amce_model_standard)
graph_data$model[is.na(graph_data$std.error)] <- "Reference"

#Coefficient plot dataset
graph_data <- graph_data %>% 
  arrange(factor(feature, levels = c("Gender", "Race", "Party", "Generation", "Education", "Religion", "Profession", "Military", "Feedback"))) %>%
  mutate(level = factor(level, levels = c("Male", "Female", "White", "Black", "Republican", "Democrat", "Millennial", "Boomer", "Ivy League", "High School",  "College", "Catholic", "Jewish", "Mormon", "Protestant", 
                                          "No Religion","Used Car Dealer", "Farmer", "Lawyer", "Entrepreneur", "Teacher", "Doctor", 
                                          "Served", "Not Served",  "High", "Low")))   

#Coef plot mean estimate and SD
graph_data$lower <- graph_data$estimate - 1.96*graph_data$std.error
graph_data$upper <- graph_data$estimate + 1.96*graph_data$std.error

#Coefficient plot, Fig. 2
ggplot(aes(y= level, x = estimate, colour = model), data = graph_data) +
  geom_vline(xintercept = 0, linetype = 2) +
  geom_point(position = ggstance::position_dodgev(height = 0.75))+
  geom_errorbar(aes(y = level, xmin = lower, xmax = upper, colour = model), width =- 0.75, position = ggstance::position_dodgev(height = 0.75), data = graph_data)+
  labs(x = "AMCE", y = "Conjoint Features", colour = "Model") +
  facet_grid(factor(feature, levels = c("Gender", "Race", "Party", "Generation", "Education", "Religion", "Profession", "Military", "Feedback"))~., scales = "free", space = "free")+
  theme(text = element_text(size = 12))

#Multiple hypothesis ASH correction
ash_standard<-ash(amce_model_standard$estimate, amce_model_standard$std.error)
amce_model_ash<-ash_standard$result
ash_image<-ash(amce_model_image$estimate, amce_model_image$std.error)
amce_model_image_ash<-ash_image$result
amce_model_image_reduced_ash <- amce_model_image_ash[1:26,]

#Data preparation for Coefficient plot (Ash-adjusted), Fig. 3
amce_model_standard$PosteriorMean <- amce_model_ash$PosteriorMean 
amce_model_standard$PosteriorSD <-amce_model_ash$PosteriorSD
amce_model_image_reduced$PosteriorMean <- amce_model_image_reduced_ash$PosteriorMean 
amce_model_image_reduced$PosteriorSD <-amce_model_image_reduced_ash$PosteriorSD

amce_model_image_reduced$model <- "Image"
amce_model_standard$model <- "Standard"

#Removing reference categories estimate (Notice the Ash estimate is the posterior estimates.)
graph_data_ash <- bind_rows(amce_model_image_reduced, amce_model_standard)
graph_data_ash$model[is.na(graph_data_ash$std.error)] <- "Reference"
#Coefficient plot dataset
graph_data_ash <- graph_data_ash %>% 
  arrange(factor(feature, levels = c("Gender", "Race", "Party", "Generation", "Education", "Religion", "Profession", "Military", "Feedback"))) %>%
  mutate(level = factor(level, levels = c("Male", "Female", "White", "Black", "Republican", "Democrat", "Millennial", "Boomer", "Ivy League", "High School",  "College", "Catholic", "Jewish", "Mormon", "Protestant", 
                                          "No Religion","Used Car Dealer", "Farmer", "Lawyer", "Entrepreneur", "Teacher", "Doctor", 
                                          "Served", "Not Served",  "High", "Low")))   

#Coef plot mean estimate and SD
graph_data_ash$PosteriorSD[graph_data_ash$level %in% c("Female","Black", "Democrat", "Boomer", "College", "No Religion", "Doctor", "Not Served", "Low")] <-NA
graph_data_ash$lower <- graph_data_ash$PosteriorMean - 1.96*graph_data_ash$PosteriorSD
graph_data_ash$upper <- graph_data_ash$PosteriorMean + 1.96*graph_data_ash$PosteriorSD

#Coefficient plot, Fig. 3
ggplot(aes(y= level, x = PosteriorMean, colour = model), data = graph_data_ash) +
  geom_vline(xintercept = 0, linetype = 2) +
  geom_point(position = ggstance::position_dodgev(height = 0.75))+
  geom_errorbar(aes(y = level, xmin = lower, xmax = upper, colour = model), width =- 0.75, position = ggstance::position_dodgev(height = 0.75), data = graph_data_ash)+
  labs(x = "AMCE", y = "Conjoint Features", colour = "Model") +
  facet_grid(factor(feature, levels = c("Gender", "Race", "Party", "Generation", "Education", "Religion", "Profession", "Military", "Feedback"))~., scales = "free", space = "free")+
  theme(text = element_text(size = 12))


#Computing statistical significance in difference in estimates across Standard and Visual Conjoint (Fig.4)
diff_estimates<-amce_model_ash$PosteriorMean-amce_model_image_reduced_ash$PosteriorMean
sd_estimates <-sqrt(amce_model_ash$PosteriorSD^2+amce_model_image_reduced_ash$PosteriorSD^2)

diff_df <-data.frame(amce_model_standard$feature, amce_model_standard$level, diff_estimates, sd_estimates)
diff_df$sd_estimates[diff_df$amce_model_standard.level %in% c("Female","Black", "Democrat", "Boomer", "College", "No Religion", "Doctor", "Not Served", "Low")] <-NA
diff_df$z <- (diff_df$diff_estimates-0)/diff_df$sd_estimates
diff_df$pvalue <- 2*pnorm(-abs(diff_df$z))
diff_df$upper <- diff_df$diff_estimates+1.96*diff_df$sd_estimates
diff_df$lower <- diff_df$diff_estimates-1.96*diff_df$sd_estimates

#Coefficient plot data
graph_data_diff <- diff_df %>% 
  arrange(factor(amce_model_standard.feature, levels = c("Gender", "Race", "Party", "Generation", "Education", "Religion", "Profession", "Military", "Feedback"))) %>%
  mutate(level = factor(amce_model_standard.level, levels = c("Male", "Female", "White", "Black", "Republican", "Democrat", "Millennial", "Boomer", "Ivy League", "High School",  "College", "Catholic", "Jewish", "Mormon", "Protestant", 
                                                     "No Religion","Used Car Dealer", "Farmer", "Lawyer", "Entrepreneur", "Teacher", "Doctor", 
                                                     "Served", "Not Served",  "High", "Low")))   
#Coefficient plot image Figure 4
ggplot(aes(y= level, x = diff_estimates), data = graph_data_diff) +
  geom_vline(xintercept = 0, linetype = 2) +
  geom_point(position = ggstance::position_dodgev(height = 0.75))+
  geom_errorbar(aes(y = amce_model_standard.level, xmin = lower, xmax = upper), width =- 0.75, data = graph_data_diff)+
  labs(x = "Standard - Visual AMCE", y = "Conjoint Features") +
  facet_grid(factor(amce_model_standard.feature, level = c("Gender", "Race", "Party", "Generation", "Education", "Religion", "Profession", "Military", "Feedback"))~., scales = "free", space = "free")+  
  theme(text = element_text(size = 12))



#Disaggregating results by respondent generation
standard_df<- base::merge(standard_conjoint_results, respondents_df, by = "caseid")
standard_df$generation <- 3 #Respondents identified as GenX or older
standard_df$generation[standard_df$age < 58] <- 2 #Respondents identified as GenX or older
standard_df$generation[standard_df$age < 42] <- 1 #Respondent identifies as Millennial or younger
standard_df$generation[standard_df$age < 26] <- 0 #Respondent identifies as Millennial or younger
standard_df$generation<-as.factor(standard_df$generation)
levels(standard_df$generation) <- c("Gen Z", "Millennial", "Gen X", "Boomer+")
standard_df$generation <- recode_factor(standard_df$generation , "Gen Z" = "Millennial or younger", 
                                              "Millennial" = "Millennial or younger",
                                              "Gen X" =  "Gen X or older",
                                              "Boomer+" = "Gen X or older")

standard_df$Feedback <- as.factor(standard_df$Feedback)
standard_df$Feedback<-recode(standard_df$Feedback, "A few" = 'Low', "A lot" = "High")

#Renaming for clarity Religion
standard_df$Religion<-standard_df$Ideology

#Transforming to factor profile variables
standard_df[,c( "Gender" , "Race" , "Generation" , "Religion",  
                "Profession" , "Education" , "Military",  "Party" , "Feedback")] <- 
  lapply(standard_df[,c("Gender" , "Race" , "Generation" , "Religion", 
                        "Profession" , "Education" , "Military", "Party", "Feedback")], as.factor) 
#marking reference levels to "No religion"
standard_df$Religion <- relevel(standard_df$Religion, ref = "No Religion")

#Standard conjoint estimation of marginal means, disaggregating by generation
f1 <- choice ~ Gender + Race + Generation + Religion + 
  Profession + Education + Military + Party + Feedback

standard_generation <-cj(standard_df, f1, id = ~ caseid, weights = ~ weight, estimate = "mm",
                         by = ~ generation)

#Correcting results for multiple hypothesis testing with ASH method
standard_generation_ash <-ash(standard_generation$estimate, standard_generation$std.error)
standard_generation_ash <- standard_generation_ash$result
standard_generation$PosteriorMean <-standard_generation_ash$PosteriorMean
standard_generation$PosteriorSD <-standard_generation_ash$PosteriorSD


#Dissagregating image results by Generation
#Relabeling age by "Millennial or younger", "GenX or older"
image_df$generation <- 1
image_df$generation[image_df$age.y < 42] <- 0
image_df$generation<-as.factor(image_df$generation)
levels(image_df$generation) <- c("Millennial or younger", "Gen X or older")
image_df$generation <-recode_factor(image_df$generation, "Gen Z" = "Millennial or younger", 
                                          "Millennial" = "Millennial or younger",
                                          "Gen X" =  "Gen X or older",
                                          "Boomer+" = "Gen X or older")

#Marginal Means estimation for image conjoint
image_generation <-cj(image_df, f2, id = ~ caseid, weights = ~ weight.x, estimate = "mm",
                      by = ~ generation)

#Multiple hypothesis estimation correction with Ash method 
image_generation_ash <-ash(image_generation$estimate, image_generation$std.error)
image_generation_ash <- image_generation_ash$result
image_generation$PosteriorMean <-image_generation_ash$PosteriorMean
image_generation$PosteriorSD <-image_generation_ash$PosteriorSD

#Coefficient plot dataframe 
image_generation$model <- "Image"
standard_generation$model <- "Standard"
image_generation$feature <- as.character(image_generation$feature)
image_generation$feature[image_generation$feature == 'relig'] <- "Religion"
image_generation$feature[image_generation$feature == 'prof'] <- "Profession"
image_generation$feature[image_generation$feature == 'party'] <- "Party"
image_generation$feature[image_generation$feature == 'mil'] <- "Military"
image_generation$feature[image_generation$feature == 'gender'] <- "Gender"
image_generation$feature[image_generation$feature == 'feed'] <- "Feedback"
image_generation$feature[image_generation$feature == 'edu'] <- "Education"
image_generation$feature[image_generation$feature == 'age'] <- "Generation"
image_generation$feature[image_generation$feature == 'race'] <- "Race"

image_generation$level <- as.character(image_generation$level)
image_generation$level[image_generation$level == 'No'] <- "Not Served"
image_generation$level[image_generation$level == 'No Relig'] <- "No Religion"
image_generation$level <- as.factor(image_generation$level)

image_generation$feature <- as.factor(image_generation$feature)
image_generation_notw <- filter(image_generation, !str_detect(feature, "^tweet"))
image_generation_notw$feature <- as.factor(image_generation_notw$feature)


generation_graph <-rbind(image_generation_notw, standard_generation)
generation_graph <- filter(generation_graph, PosteriorMean != 0)
generation_graph$comb <- paste0(generation_graph$level, generation_graph$generation)
generation_graph$feat_level<-paste0(generation_graph$feature, ": ", generation_graph$level) 
generation_graph$feat_level <- factor(generation_graph$feat_level, 
                                      levels = c("Feedback: High", "Feedback: Low",
                                                 "Military: Served", "Military: Not Served",
                                                 "Profession: Used Car Dealer", "Profession: Teacher", "Profession: Lawyer", "Profession: Farmer", "Profession: Entrepreneur", "Profession: Doctor",
                                                 "Religion: No Religion", "Religion: Protestant", "Religion: Mormon", "Religion: Jewish", "Religion: Catholic",
                                                 "Education: Ivy League", 
                                                 "Education: College",
                                                 "Education: High School",
                                                 "Generation: Boomer",
                                                 'Generation: Millennial',
                                                 "Party: Democrat",
                                                 "Party: Republican", 
                                                 "Race: Black",
                                                 "Race: White",
                                                 "Gender: Male", "Gender: Female"
                                      ))

generation_graph$lower <-generation_graph$PosteriorMean - 1.96*generation_graph$PosteriorSD
generation_graph$upper <-generation_graph$PosteriorMean + 1.96*generation_graph$PosteriorSD

#Coefficient plot (Figure 5)
ggplot(aes(x = PosteriorMean, y = feat_level, group = model, colour = model), data = generation_graph)  +
  geom_vline(xintercept = 0.5, linetype = 2) +
  geom_point(size = 2, position = ggstance::position_dodgev(height = 0.75))+
  geom_errorbar(aes(y = feat_level, xmin = lower, xmax = upper), width = 0.75, position = ggstance::position_dodgev(height = 0.75)) +
  labs(x = "Marginal Means", y = "Feature: Level", colour = "Model") +
  facet_wrap(generation ~.) +
  theme(text = element_text(size = 12))


#Table 1
table(amce_model_standard$level)

#Sample Descriptive Statistics (Table 2)
respondents <- read.csv("STAN0156_OUTPUT.csv")
respondents$age <- 2022 - respondents$birthyr 

#Gender
respondents$female <- 0
respondents$female[respondents$gender_demo == "Female"] <- 1

#Race==White
respondents$white <- 0
respondents$white[respondents$race == "White"] <- 1

respondents$black <- 0
respondents$black[respondents$race == "Black"] <- 1

respondents$other_race <- 0
respondents$other_race[respondents$race == "Asian" | respondents$race == "Hispanic" |  respondents$race == "Middle Eastern" | respondents$race == "Native American"| respondents$race == "Other" | respondents$race == "Two or more races"] <- 1

respondents$HS_less <- 0
respondents$HS_less[respondents$educ_demo == "No HS" | respondents$educ_demo == "High school graduate"] <- 1

respondents$college <- 0 
respondents$college[respondents$educ_demo == "2-year" | respondents$educ_demo == "Some college"] <- 1

respondents$grad_more <- 0
respondents$grad_more[respondents$educ_demo == "2-year" | respondents$educ_demo == "Post-grad"] <- 1

respondents$strong_rep <- 0
respondents$strong_rep[respondents$pid7 == "Strong Republican"] <- 1

respondents$lean_rep <- 0
respondents$lean_rep[respondents$pid7 == "Not very strong Republican" | respondents$pid7 == "Lean Republican"] <- 1

respondents$indep <- 0
respondents$indep[respondents$pid7 == "Independent"] <- 1

respondents$lean_dem <- 0
respondents$lean_dem[respondents$pid7 == "Not very strong Democrat" | respondents$pid7 == "Lean Democrat"] <- 1

respondents$strong_dem <- 0
respondents$strong_dem[respondents$pid7 == "Strong Democrat"] <- 1

descriptives <- stat.desc(respondents[, c("age", "female", "white", "black", "other_race", "HS_less", "college", "grad_more", "strong_rep", "lean_rep", "indep", "lean_dem", "strong_dem")])

#Table 2
t(round(descriptives, 2))


##Appendix
library(matrixStats)
literacy_var <-c("q2_a", "q2_b", "q2_c", "q2_d", "q2_e", "q2_f")
respondents[literacy_var] <-  lapply(respondents[literacy_var], function(y)      gsub(pattern = "No understanding",
                                                                                      replacement = "1", y))
respondents[literacy_var] <- lapply(respondents[literacy_var], function(y)
  gsub(pattern = "Full understanding",
       replacement = "5", y))

respondents[literacy_var] <- lapply(respondents[literacy_var], function(y) as.numeric(y))
respondents$q2_f <- (6 - respondents$q2_f )
respondents$literacy_index <- rowMedians(as.matrix(respondents[literacy_var]))
respondents$high_literacy <- 0
respondents$high_literacy[respondents$literacy_index > 3.5] <- 1
respondents$high_literacy <- as.factor(respondents$high_literacy)

power_var <-c("q3_a", "q3_b", "q3_c", "q3_d", "q3_e", "q3_f",  "q3_g")
respondents[power_var] <- lapply(respondents[power_var], function(y) 
  gsub(pattern = " \\(Strongly Disagree\\)",
       replacement = "", y))

respondents[power_var] <- lapply(respondents[power_var], function(y) 
  gsub(pattern = " \\(Strongly Agree\\)",
       replacement = "", y))

respondents[power_var] <- lapply(respondents[power_var], function(y) as.numeric(y))

respondents$power_index <- rowMedians(as.matrix(respondents[power_var]))

respondents$high_power <- 0
respondents$high_power[respondents$power_index > -1 ] <- 1
respondents$high_power <- as.factor(respondents$high_power)

respondents_df<-select(respondents, high_power, high_literacy, caseid, weight )
respondents_df$caseid <-as.character(respondents_df$caseid)

standard_df<- base::merge(standard_conjoint_results, respondents_df, by = "caseid")

#Recoding for clarity Twitter feedback variable
standard_df$Feedback <- as.factor(standard_df$Feedback)
standard_df$Feedback<-recode(standard_df$Feedback, "A few" = 'Low', "A lot" = "High")

#Renaming for clarity Religion
standard_df$Religion<-standard_df$Ideology

#Transforming to factor profile variables
standard_df[,c( "Gender" , "Race" , "Generation" , "Religion",  
                "Profession" , "Education" , "Military",  "Party" , "Feedback")] <- 
  lapply(standard_df[,c("Gender" , "Race" , "Generation" , "Religion", 
                        "Profession" , "Education" , "Military", "Party", "Feedback")], as.factor) 
#marking reference levels to "No religion"
standard_df$Religion <- relevel(standard_df$Religion, ref = "No Religion")

f1 <- choice ~ Gender + Race + Generation + Religion + 
  Profession + Education + Military + Party + Feedback

standard_skills <-cj(standard_df, f1, id = ~ caseid, weights = ~ weight, estimate = "mm",
                     by = ~ high_literacy)


#Image conjoint
image_df<- base::merge(image_results, respondents_df, by = "caseid")
#Transforming characteristics as factors
image_df[,c("gender", "race", "age", "edu", "mil", "party", "feed", "prof", "relig", "tweet1", "tweet2")] <- 
  lapply(image_df[,c("gender", "race", "age", "edu", "mil", "party", "feed", "prof", "relig", "tweet1", "tweet2")], as.factor)

#Leveling Gender reference category as "Female"
levels(image_df$gender) <- c("Male", "Female")
image_df$gender <- relevel(image_df$gender, ref = "Female") 

#Leveling Gender reference category as "Black"
levels(image_df$race) <- c("White", "Black")
image_df$race <- relevel(image_df$race, ref = "Black") 

#Leveling Education reference category as "College"
levels(image_df$edu) <- c("High School", "College", "Ivy League")
image_df$edu <- relevel(image_df$edu, ref = "College") 

#Leveling Age reference category as "Boomer
levels(image_df$age) <- c("Boomer", "Millennial")
image_df$age <- relevel(image_df$age, ref = "Boomer") 

#No re-leveling needed, default reference category correct
levels(image_df$mil) <- c("Not Served", "Served")
levels(image_df$party) <- c("Republican", "Democrat")

levels(image_df$feed) <- c("Low", "High")
levels(image_df$prof) <- c("Used Car Dealer", "Doctor", "Farmer", "Lawyer", "Entrepreneur", "Teacher")

#Releveling reference category "Doctor"
image_df$prof <- relevel(image_df$prof, ref = "Doctor") 

#Leveling Religion reference category as "No Religion"
levels(image_df$relig) <- c("No Religion", "Catholic", "Jewish", "Mormon", "Protestant")
image_df$relig <- relevel(image_df$relig, ref = "No Religion") 

levels(image_df$party) <- c("Republican", "Democrat")
image_df$party <- relevel(image_df$party, ref = "Democrat") 

#Leveling Tweets categories
levels(image_df$tweet1) <- c("0", "1")
levels(image_df$tweet2) <- c("3", "4")


f2 <- choice ~ gender + race +  age + relig + prof + edu +  mil + party  +   feed + tweet1 + tweet2

image_skills <-cj(image_df, f2, id = ~ caseid, weights = ~ weight.x, estimate = "mm",
                  #  feature_labels = list(gender ="Gender", race = "Race", age ="Age", feedback = "Feedback", drs ="DRS"),
                  by = ~ high_literacy)


image_skills$model <- "Image"
standard_skills$model <- "Standard"
image_skills$feature <- as.character(image_skills$feature)
image_skills$feature[image_skills$feature == 'relig'] <- "Religion"
image_skills$feature[image_skills$feature == 'prof'] <- "Profession"
image_skills$feature[image_skills$feature == 'party'] <- "Party"
image_skills$feature[image_skills$feature == 'mil'] <- "Military"
image_skills$feature[image_skills$feature == 'gender'] <- "Gender"
image_skills$feature[image_skills$feature == 'feed'] <- "Feedback"
image_skills$feature[image_skills$feature == 'edu'] <- "Education"
image_skills$feature[image_skills$feature == 'age'] <- "Generation"
image_skills$feature[image_skills$feature == 'race'] <- "Race"
image_skills$level <- as.character(image_skills$level)
image_skills$level[image_skills$level == "No"] <- "Not Served"
image_skills$level[image_skills$level == "No Relig"] <- "No Religion"
image_skills$feature <- as.factor(image_skills$feature)
image_skills$level <- as.factor(image_skills$level)

image_skills_notw <- filter(image_skills, !str_detect(feature, "^tweet"))
image_skills_notw$feature <- as.factor(image_skills_notw$feature)


skills_graph <-rbind(image_skills_notw, standard_skills)
skills_graph$high_literacy<- recode(skills_graph$high_literacy, "0" = "Low Literacy", "1" = "High Literacy")
skills_graph[skills_graph$high_literacy == "1"] <- "High Literacy"
skills_graph$feat_level<-paste0(skills_graph$feature, ": ", skills_graph$level) 
skills_graph$feat_level <- factor(skills_graph$feat_level, 
                                  levels = c("Feedback: High", "Feedback: Low",
                                             "Military: Served", "Military: Not Served",
                                             "Profession: Used Car Dealer", "Profession: Teacher", "Profession: Lawyer", "Profession: Farmer", "Profession: Entrepreneur", "Profession: Doctor",
                                             "Religion: No Religion", "Religion: Protestant", "Religion: Mormon", "Religion: Jewish", "Religion: Catholic",
                                             "Education: Ivy League", 
                                             "Education: College",
                                             "Education: High School",
                                             "Generation: Boomer",
                                             'Generation: Millennial',
                                             "Party: Democrat",
                                             "Party: Republican", 
                                             "Race: Black",
                                             "Race: White",
                                             "Gender: Male", "Gender: Female"
                                  ))

#Appendix B, Figure 1(a)
ggplot(aes(x = estimate, y = feat_level, group = model, colour = model), data = skills_graph)  +
  geom_vline(xintercept = 0.5, linetype = 2) +
  geom_point(size = 2, position = ggstance::position_dodgev(height = 0.75))+
  geom_errorbar(aes(y = feat_level, xmin = lower, xmax = upper), width = 0.75, position = ggstance::position_dodgev(height = 0.75)) +
  labs(x = "Marginal Means", y = "Feature: Level", colour = "Model") +
  facet_wrap(high_literacy ~.) +
  theme(text = element_text(size = 12))


##Power
standard_power <-cj(standard_df, f1, id = ~ caseid, weights = ~ weight, estimate = "mm",
                    #  feature_labels = list(gender ="Gender", race = "Race", age ="Age", feedback = "Feedback", drs ="DRS"),
                    by = ~ high_power) 


image_power <-cj(image_df, f2, id = ~ caseid, weights = ~ weight.x, estimate = "mm",
                 #  feature_labels = list(gender ="Gender", race = "Race", age ="Age", feedback = "Feedback", drs ="DRS"),
                 by = ~ high_power)

image_power$model <- "Image"
standard_power$model <- "Standard"
image_power$feature <- as.character(image_power$feature)
image_power$feature[image_power$feature == 'relig'] <- "Religion"
image_power$feature[image_power$feature == 'prof'] <- "Profession"
image_power$feature[image_power$feature == 'party'] <- "Party"
image_power$feature[image_power$feature == 'mil'] <- "Military"
image_power$feature[image_power$feature == 'gender'] <- "Gender"
image_power$feature[image_power$feature == 'feed'] <- "Feedback"
image_power$feature[image_power$feature == 'edu'] <- "Education"
image_power$feature[image_power$feature == 'age'] <- "Generation"
image_power$feature[image_power$feature == 'race'] <- "Race"
image_power$level <- as.character(image_power$level)
image_power$level[image_power$level == 'No'] <- "Not Served"
image_power$level[image_power$level == 'No Relig'] <- "No Religion"
image_power$feature <- as.factor(image_power$feature)
image_power$level <- as.factor(image_power$level)
image_power_notw <- filter(image_power, !str_detect(feature, "^tweet"))
image_power_notw$feature <- as.factor(image_power_notw$feature)


power_graph <-rbind(image_power_notw, standard_power)
power_graph <- filter(power_graph, estimate != 0)
power_graph$high_power<- recode(power_graph$high_power, "0" = "Low Literacy", "1" = "High Literacy")
power_graph[power_graph$high_power == "1"] <- "High Literacy"
power_graph$comb <- paste0(power_graph$level, power_graph$power)
power_graph$feat_level<-paste0(power_graph$feature, ": ", power_graph$level) 
power_graph$feat_level <- factor(power_graph$feat_level, 
                                 levels = c("Feedback: High", "Feedback: Low",
                                            "Military: Served", "Military: Not Served",
                                            "Profession: Used Car Dealer", "Profession: Teacher", "Profession: Lawyer", "Profession: Farmer", "Profession: Entrepreneur", "Profession: Doctor",
                                            "Religion: No Religion", "Religion: Protestant", "Religion: Mormon", "Religion: Jewish", "Religion: Catholic",
                                            "Education: Ivy League", 
                                            "Education: College",
                                            "Education: High School",
                                            "Generation: Boomer",
                                            'Generation: Millennial',
                                            "Party: Democrat",
                                            "Party: Republican", 
                                            "Race: Black",
                                            "Race: White",
                                            "Gender: Male", "Gender: Female"
                                 ))

#Appendix B, Figure 1(b)
ggplot(aes(x = estimate, y = feat_level, group = model, colour = model), data = power_graph)  +
  geom_vline(xintercept = 0.5, linetype = 2) +
  geom_point(size = 2, position = ggstance::position_dodgev(height = 0.75))+
  geom_errorbar(aes(y = feat_level, xmin = lower, xmax = upper), width = 0.75, position = ggstance::position_dodgev(height = 0.75)) +
  labs(x = "Marginal Means", y = "Feature: Level", colour = "Model") +
  facet_wrap(high_power ~.) +
  theme(text = element_text(size = 12))


#Appendix C Distribution of profile
f <-cj_freqs(standard_df, f1)
plot(f)